#########################
####                 ####
####    ROC plot     ####
####                 ####
#########################




library(haven)

data <- read_dta("FigS1data.dta")

library(pROC)


# Create a matrix with the occupation dummies and one with the predicted probabilities

matrixjob <- cbind(data$job12,data$job13,data$job21,data$job22,data$job23,data$job24,data$job31,data$job32,data$job33,data$job34,data$job41,data$job42,data$job51,data$job52,data$job61,data$job71,data$job72,data$job73,data$job74,data$job81,data$job82,data$job83,data$job91,data$job93)
matrixdhat <- cbind(data$dhat12,data$dhat13,data$dhat21,data$dhat22,data$dhat23,data$dhat24,data$dhat31,data$dhat32,data$dhat33,data$dhat34,data$dhat41,data$dhat42,data$dhat51,data$dhat52,data$dhat61,data$dhat71,data$dhat72,data$dhat73,data$dhat74,data$dhat81,data$dhat82,data$dhat83,data$dhat91,data$dhat93)


# This vector let us map the position (row in the matrix), of the occupation dummies/predicted probabilities with the corresponding occupation number 

map_numb_pos <- c(12, 13, 21, 22, 23, 24, 31, 32, 33, 34, 41, 42, 51, 52, 61, 71, 72, 73, 74, 81, 82, 83, 91, 93)




# One graph with all curves:

pdf(paste("figS1", ".pdf"))


counter=1
for (i in 1:length(map_numb_pos)){
  roc(matrixjob[,i], matrixdhat[,i])
  if(counter==1){plot.boolean=FALSE}else{plot.boolean=TRUE}
  roc_plots <- roc(matrixjob[,i],
                   matrixdhat[,i], percent=TRUE,
                   # arguments for ci
                   ci=TRUE, boot.n=100, ci.alpha=0.9, stratified=FALSE,
                   # arguments for plot
                   plot=FALSE, auc.polygon=TRUE, max.auc.polygon=TRUE, grid=TRUE,
                   print.auc=TRUE, show.thres=TRUE)
  
  plot.roc(roc_plots, pch = 16, col = "grey40", add=plot.boolean)
  counter = counter+1
}
dev.off()


# Create a matrix to map occupation number with the AUC value

auc_vector <- c()
for (i in 1:length(map_numb_pos)){
  roc_i <- roc(matrixjob[,i], matrixdhat[,i])
  roc_i$auc[1]
  auc_vector[i] <- roc_i$auc[1]
}

job_auc <- cbind(map_numb_pos, auc_vector)

write.csv(job_auc, "auc value by job.csv")
write.xlsx(job_auc, "auc value by job.xlsx")

summary(job_auc[,2])


weights <- c()
for (i in 1:length(map_numb_pos)){
  weight <- table(matrixjob[,i])
  freq <- as.data.frame(weight)
  weights[i] <- freq[2,2]/nrow(data)
}

#Renormalize the vector of weights

for (i in 1:length(map_numb_pos)){
  weights[i] <- weights[i]/sum(weights)
}
sum(weights)


weighted.mean(auc_vector, weights)

job_auc_wei <- cbind(map_numb_pos, auc_vector, weights)

write.csv(job_auc_wei, "auc value and weight by job.csv")
